{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Infer schema directly, self correct and pick optimal query"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Imports "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from vertexai.language_models import CodeGenerationModel\n",
    "from google.cloud import bigquery\n",
    "import pandas as pd\n",
    "import logging \n",
    "import time\n",
    "import os "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Setup logging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "logger = logging.getLogger(__name__)\n",
    "logger.setLevel(logging.DEBUG)\n",
    "logger.addHandler(logging.StreamHandler())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Setup essentials"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Adjust display settings\n",
    "pd.set_option('display.max_colwidth', None)  # Set max column width to None to show all content\n",
    "pd.set_option('display.expand_frame_repr', False)  # Prevent truncation of DataFrame HTML representation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "SERVICE_ACCOUNT_CREDENTIALS = './../credentials/vai-key.json'\n",
    "os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = SERVICE_ACCOUNT_CREDENTIALS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "PROJECT_ID = 'arun-genai-bb'\n",
    "CODE_GEN_MODEL_NAME = 'code-bison@latest'\n",
    "TEMPERATURE = 1 \n",
    "MAX_OUTPUT_TOKENS = 2048  # length of the output response | overridding the default value which is 128\n",
    "# TOP_P = 0.95  # default value\n",
    "# TOP_K = 40  # default value\n",
    "LOCATION = 'us-central1'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "DATASET = 'flight_reservations'\n",
    "TABLES = ['customers', 'flights', 'reservations', 'transactions', 'loyality_points']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "code_gen_model = CodeGenerationModel.from_pretrained(CODE_GEN_MODEL_NAME)\n",
    "bq_client = bigquery.Client()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "    SELECT *\n",
      "    FROM `arun-genai-bb.flight_reservations.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS`\n",
      "    WHERE table_name in (\"customers\",\"flights\",\"reservations\",\"transactions\",\"loyality_points\")\n",
      "\n"
     ]
    }
   ],
   "source": [
    "query = f\"\"\"\n",
    "    SELECT *\n",
    "    FROM `{PROJECT_ID}.{DATASET}.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS`\n",
    "    WHERE table_name in ({','.join([f'\"{table}\"' for table in TABLES])})\n",
    "\"\"\"\n",
    "logger.info(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>table_catalog</th>\n",
       "      <th>table_schema</th>\n",
       "      <th>table_name</th>\n",
       "      <th>column_name</th>\n",
       "      <th>field_path</th>\n",
       "      <th>data_type</th>\n",
       "      <th>description</th>\n",
       "      <th>collation_name</th>\n",
       "      <th>rounding_mode</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>transactions</td>\n",
       "      <td>transaction_id</td>\n",
       "      <td>transaction_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>transactions</td>\n",
       "      <td>reservation_id</td>\n",
       "      <td>reservation_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>transactions</td>\n",
       "      <td>amount</td>\n",
       "      <td>amount</td>\n",
       "      <td>FLOAT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>transactions</td>\n",
       "      <td>transaction_datetime</td>\n",
       "      <td>transaction_datetime</td>\n",
       "      <td>DATETIME</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>reservations</td>\n",
       "      <td>reservation_id</td>\n",
       "      <td>reservation_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>reservations</td>\n",
       "      <td>customer_id</td>\n",
       "      <td>customer_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>reservations</td>\n",
       "      <td>flight_id</td>\n",
       "      <td>flight_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>reservations</td>\n",
       "      <td>reservation_datetime</td>\n",
       "      <td>reservation_datetime</td>\n",
       "      <td>DATETIME</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>reservations</td>\n",
       "      <td>status</td>\n",
       "      <td>status</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>flight_id</td>\n",
       "      <td>flight_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>origin</td>\n",
       "      <td>origin</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>destination</td>\n",
       "      <td>destination</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>departure_datetime</td>\n",
       "      <td>departure_datetime</td>\n",
       "      <td>DATETIME</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>arrival_datetime</td>\n",
       "      <td>arrival_datetime</td>\n",
       "      <td>DATETIME</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>carrier</td>\n",
       "      <td>carrier</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>flights</td>\n",
       "      <td>price</td>\n",
       "      <td>price</td>\n",
       "      <td>FLOAT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>customer_id</td>\n",
       "      <td>customer_id</td>\n",
       "      <td>INT64</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>first_name</td>\n",
       "      <td>first_name</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>last_name</td>\n",
       "      <td>last_name</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>email</td>\n",
       "      <td>email</td>\n",
       "      <td>STRING</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>date_of_birth</td>\n",
       "      <td>date_of_birth</td>\n",
       "      <td>DATE</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>arun-genai-bb</td>\n",
       "      <td>flight_reservations</td>\n",
       "      <td>customers</td>\n",
       "      <td>created_at</td>\n",
       "      <td>created_at</td>\n",
       "      <td>DATETIME</td>\n",
       "      <td>None</td>\n",
       "      <td>NULL</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    table_catalog         table_schema    table_name           column_name            field_path data_type description collation_name rounding_mode\n",
       "0   arun-genai-bb  flight_reservations  transactions        transaction_id        transaction_id     INT64        None           NULL          None\n",
       "1   arun-genai-bb  flight_reservations  transactions        reservation_id        reservation_id     INT64        None           NULL          None\n",
       "2   arun-genai-bb  flight_reservations  transactions                amount                amount   FLOAT64        None           NULL          None\n",
       "3   arun-genai-bb  flight_reservations  transactions  transaction_datetime  transaction_datetime  DATETIME        None           NULL          None\n",
       "4   arun-genai-bb  flight_reservations  reservations        reservation_id        reservation_id     INT64        None           NULL          None\n",
       "5   arun-genai-bb  flight_reservations  reservations           customer_id           customer_id     INT64        None           NULL          None\n",
       "6   arun-genai-bb  flight_reservations  reservations             flight_id             flight_id     INT64        None           NULL          None\n",
       "7   arun-genai-bb  flight_reservations  reservations  reservation_datetime  reservation_datetime  DATETIME        None           NULL          None\n",
       "8   arun-genai-bb  flight_reservations  reservations                status                status    STRING        None           NULL          None\n",
       "9   arun-genai-bb  flight_reservations       flights             flight_id             flight_id     INT64        None           NULL          None\n",
       "10  arun-genai-bb  flight_reservations       flights                origin                origin    STRING        None           NULL          None\n",
       "11  arun-genai-bb  flight_reservations       flights           destination           destination    STRING        None           NULL          None\n",
       "12  arun-genai-bb  flight_reservations       flights    departure_datetime    departure_datetime  DATETIME        None           NULL          None\n",
       "13  arun-genai-bb  flight_reservations       flights      arrival_datetime      arrival_datetime  DATETIME        None           NULL          None\n",
       "14  arun-genai-bb  flight_reservations       flights               carrier               carrier    STRING        None           NULL          None\n",
       "15  arun-genai-bb  flight_reservations       flights                 price                 price   FLOAT64        None           NULL          None\n",
       "16  arun-genai-bb  flight_reservations     customers           customer_id           customer_id     INT64        None           NULL          None\n",
       "17  arun-genai-bb  flight_reservations     customers            first_name            first_name    STRING        None           NULL          None\n",
       "18  arun-genai-bb  flight_reservations     customers             last_name             last_name    STRING        None           NULL          None\n",
       "19  arun-genai-bb  flight_reservations     customers                 email                 email    STRING        None           NULL          None\n",
       "20  arun-genai-bb  flight_reservations     customers         date_of_birth         date_of_birth      DATE        None           NULL          None\n",
       "21  arun-genai-bb  flight_reservations     customers            created_at            created_at  DATETIME        None           NULL          None"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "schema_columns = bq_client.query(query=query).to_dataframe()\n",
    "schema_columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "| table_catalog   | table_schema        | table_name   | column_name          | field_path           | data_type   | description   | collation_name   | rounding_mode   |\n",
      "|:----------------|:--------------------|:-------------|:---------------------|:---------------------|:------------|:--------------|:-----------------|:----------------|\n",
      "| arun-genai-bb   | flight_reservations | transactions | transaction_id       | transaction_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | reservation_id       | reservation_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | amount               | amount               | FLOAT64     |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | transaction_datetime | transaction_datetime | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | reservation_id       | reservation_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | customer_id          | customer_id          | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | flight_id            | flight_id            | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | reservation_datetime | reservation_datetime | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | status               | status               | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | flight_id            | flight_id            | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | origin               | origin               | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | destination          | destination          | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | departure_datetime   | departure_datetime   | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | arrival_datetime     | arrival_datetime     | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | carrier              | carrier              | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | price                | price                | FLOAT64     |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | customer_id          | customer_id          | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | first_name           | first_name           | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | last_name            | last_name            | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | email                | email                | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | date_of_birth        | date_of_birth        | DATE        |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | created_at           | created_at           | DATETIME    |               | NULL             |                 |\n"
     ]
    }
   ],
   "source": [
    "schema_columns = schema_columns.to_markdown(index=False)\n",
    "logger.info(schema_columns)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Utility function to convert text into SQL and automatically correct the SQL query if any execution errors occur. <br>\n",
    "The function also collects the successful query executions, ranks them by latency of execution, and returns the fastest. <br>\n",
    "Additionally, it provides an option to retrieve all the successful queries and their latencies as a dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_and_execute_sql(prompt, max_tries=5, return_all=False):\n",
    "    \"\"\"\n",
    "    Generate an SQL query using the code_gen_model, execute it using bq_client, and rank successful queries by latency.\n",
    "    \n",
    "    Args:\n",
    "    - prompt (str): Prompt to provide to the model for generating SQL.\n",
    "    - max_tries (int): Maximum number of attempts to generate and execute SQL.\n",
    "    - return_all (bool): Flag to determine whether to return all successful queries or only the fastest.\n",
    "    \n",
    "    Returns:\n",
    "    - dict: A dictionary containing the fastest dataframe or all successful dataframes, or error messages and prompt evolution.\n",
    "    \"\"\"\n",
    "    \n",
    "    tries = 0\n",
    "    error_messages = []\n",
    "    prompts = [prompt]\n",
    "    successful_queries = []\n",
    "    \n",
    "    while tries < max_tries:\n",
    "        logger.info(f'TRIAL: {tries+1}')\n",
    "        try:\n",
    "            # Predict SQL using the model\n",
    "            start_time = time.time()\n",
    "            response = code_gen_model.predict(prompt, temperature=TEMPERATURE, max_output_tokens=MAX_OUTPUT_TOKENS)\n",
    "            generated_sql_query = response.text\n",
    "            generated_sql_query = '\\n'.join(generated_sql_query.split('\\n')[1:-1])\n",
    "            logger.info('-' * 50)\n",
    "            logger.info(generated_sql_query)\n",
    "            logger.info('-' * 50)\n",
    "            # Execute SQL using BigQuery client\n",
    "            df = bq_client.query(generated_sql_query).to_dataframe()\n",
    "            latency = time.time() - start_time\n",
    "            successful_queries.append({\n",
    "                \"query\": generated_sql_query,\n",
    "                \"dataframe\": df,\n",
    "                \"latency\": latency\n",
    "            })\n",
    "            logger.info('SUCCEEDED')\n",
    "            # Evolve the prompt for success path to optimize the last successful query for latency\n",
    "            if len(successful_queries) > 1:\n",
    "                prompt = f\"\"\"Modify the last successful SQL query by making changes to it and optimizing it for latency. \n",
    "            ENSURE that the NEW QUERY is DIFFERENT from the previous one while prioritizing faster execution. \n",
    "            The last successful query was:\n",
    "            {successful_queries[-1][\"query\"]}\"\"\"\n",
    "        except Exception as e:\n",
    "            logger.error('FAILED')\n",
    "            # Catch the error, store the message, and try again\n",
    "            msg = str(e)\n",
    "            error_messages.append(msg)\n",
    "            # Evolve the prompt by appending the error message and asking the model to correct it\n",
    "            prompt = f\"\"\"{prompt}\n",
    "Encountered an error: {msg}. \n",
    "To address this, please generate an alternative SQL query response that avoids this specific error. \n",
    "Follow the instructions mentioned above to remediate the error. \n",
    "\n",
    "Modify the below SQL query to resolve the issue and ensure it is not a repetition of the previously generated query.\n",
    "{generated_sql_query}\n",
    " \n",
    "Ensure the revised SQL query aligns precisely with the requirements outlined in the initial question.\n",
    "Additionally, please optimize the query for latency while maintaining correctness and efficiency.\"\"\"\n",
    "            prompts.append(prompt)\n",
    "        logger.info('=' * 100)\n",
    "        tries += 1\n",
    "    # If no successful queries\n",
    "    if len(successful_queries) == 0:\n",
    "        return {\n",
    "            \"error\": \"All attempts exhausted.\",\n",
    "            \"prompts\": prompts,\n",
    "            \"errors\": error_messages\n",
    "        }\n",
    "    \n",
    "    # Sort successful queries by latency\n",
    "    successful_queries.sort(key=lambda x: x['latency'])\n",
    "    \n",
    "    if return_all:\n",
    "        df = pd.DataFrame([(q[\"query\"], q[\"dataframe\"], q[\"latency\"]) for q in successful_queries], columns=[\"Query\", \"Result\", \"Latency\"])\n",
    "        return {\n",
    "            \"dataframe\": df\n",
    "        }\n",
    "    else:\n",
    "        return {\n",
    "            \"fastest_query\": successful_queries[0][\"query\"],\n",
    "            \"result\": successful_queries[0][\"dataframe\"],\n",
    "            \"latency\": successful_queries[0][\"latency\"]\n",
    "        }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test text to SQL scenarios"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Construct the SEED prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Please craft a SQL query for BigQuery that addresses the following QUESTION provided below. \n",
      "Ensure you reference the appropriate BigQuery tables and column names provided in the SCHEMA below. \n",
      "When joining tables, employ type coercion to guarantee data type consistency for the join columns. \n",
      "Additionally, the output column names should specify units where applicable.\n",
      "\n",
      "QUESTION:\n",
      "{}\n",
      "\n",
      "SCHEMA:\n",
      "{}\n",
      "\n",
      "IMPORTANT: \n",
      "Use ONLY DATETIME and DO NOT use TIMESTAMP.\n",
      "--\n",
      "Ensure your SQL query accurately defines both the start and end of the DATETIME range.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "seed_prompt = \"\"\"\n",
    "Please craft a SQL query for BigQuery that addresses the following QUESTION provided below. \n",
    "Ensure you reference the appropriate BigQuery tables and column names provided in the SCHEMA below. \n",
    "When joining tables, employ type coercion to guarantee data type consistency for the join columns. \n",
    "Additionally, the output column names should specify units where applicable.\\n\n",
    "QUESTION:\n",
    "{}\\n\n",
    "SCHEMA:\n",
    "{}\\n\n",
    "IMPORTANT: \n",
    "Use ONLY DATETIME and DO NOT use TIMESTAMP.\n",
    "--\n",
    "Ensure your SQL query accurately defines both the start and end of the DATETIME range.\n",
    "\"\"\"\n",
    "logger.info(seed_prompt)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Scenario 1: Retrieve Active Reservations for a Specific Date Range"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For this scenario, you want to find all active reservations within a specific date range."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Provide a list of all flight reservations from October 10th to October 15th, 2023\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Please craft a SQL query for BigQuery that addresses the following QUESTION provided below. \n",
      "Ensure you reference the appropriate BigQuery tables and column names provided in the SCHEMA below. \n",
      "When joining tables, employ type coercion to guarantee data type consistency for the join columns. \n",
      "Additionally, the output column names should specify units where applicable.\n",
      "\n",
      "QUESTION:\n",
      "Provide a list of all flight reservations from October 10th to October 15th, 2023\n",
      "\n",
      "SCHEMA:\n",
      "| table_catalog   | table_schema        | table_name   | column_name          | field_path           | data_type   | description   | collation_name   | rounding_mode   |\n",
      "|:----------------|:--------------------|:-------------|:---------------------|:---------------------|:------------|:--------------|:-----------------|:----------------|\n",
      "| arun-genai-bb   | flight_reservations | transactions | transaction_id       | transaction_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | reservation_id       | reservation_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | amount               | amount               | FLOAT64     |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | transactions | transaction_datetime | transaction_datetime | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | reservation_id       | reservation_id       | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | customer_id          | customer_id          | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | flight_id            | flight_id            | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | reservation_datetime | reservation_datetime | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | reservations | status               | status               | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | flight_id            | flight_id            | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | origin               | origin               | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | destination          | destination          | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | departure_datetime   | departure_datetime   | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | arrival_datetime     | arrival_datetime     | DATETIME    |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | carrier              | carrier              | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | flights      | price                | price                | FLOAT64     |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | customer_id          | customer_id          | INT64       |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | first_name           | first_name           | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | last_name            | last_name            | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | email                | email                | STRING      |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | date_of_birth        | date_of_birth        | DATE        |               | NULL             |                 |\n",
      "| arun-genai-bb   | flight_reservations | customers    | created_at           | created_at           | DATETIME    |               | NULL             |                 |\n",
      "\n",
      "IMPORTANT: \n",
      "Use ONLY DATETIME and DO NOT use TIMESTAMP.\n",
      "--\n",
      "Ensure your SQL query accurately defines both the start and end of the DATETIME range.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)\n",
    "logger.info(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  r.reservation_id,\n",
      "  r.customer_id,\n",
      "  r.flight_id,\n",
      "  r.reservation_datetime,\n",
      "  r.status,\n",
      "  f.origin,\n",
      "  f.destination,\n",
      "  f.departure_datetime,\n",
      "  f.arrival_datetime,\n",
      "  f.carrier,\n",
      "  f.price\n",
      "FROM arun-genai-bb.flight_reservations.reservations r\n",
      "JOIN arun-genai-bb.flight_reservations.flights f\n",
      "ON CAST(r.flight_id AS STRING) = CAST(f.flight_id AS STRING)\n",
      "WHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  r.reservation_id,\n",
      "  r.customer_id,\n",
      "  r.flight_id,\n",
      "  r.reservation_datetime,\n",
      "  r.status,\n",
      "  f.origin,\n",
      "  f.destination,\n",
      "  f.departure_datetime,\n",
      "  f.arrival_datetime,\n",
      "  f.carrier,\n",
      "  f.price\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.reservations AS r\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.flights AS f\n",
      "ON\n",
      "  r.flight_id = f.flight_id\n",
      "WHERE\n",
      "  r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\n",
      "ORDER BY\n",
      "  r.reservation_datetime;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  r.reservation_id,\n",
      "  r.customer_id,\n",
      "  r.flight_id,\n",
      "  r.reservation_datetime,\n",
      "  r.status,\n",
      "  f.origin,\n",
      "  f.destination,\n",
      "  f.departure_datetime,\n",
      "  f.arrival_datetime,\n",
      "  f.carrier,\n",
      "  f.price\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.reservations AS r\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.flights AS f\n",
      "ON\n",
      "  r.flight_id = f.flight_id\n",
      "WHERE\n",
      "  r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\n",
      "ORDER BY\n",
      "  r.reservation_datetime\n",
      "LIMIT 100;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  r.reservation_id,\n",
      "  r.customer_id,\n",
      "  r.flight_id,\n",
      "  r.reservation_datetime,\n",
      "  r.status,\n",
      "  f.origin,\n",
      "  f.destination,\n",
      "  f.departure_datetime,\n",
      "  f.arrival_datetime,\n",
      "  f.carrier,\n",
      "  f.price\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.reservations AS r\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.flights AS f\n",
      "ON\n",
      "  r.flight_id = f.flight_id\n",
      "WHERE\n",
      "  r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\n",
      "ORDER BY\n",
      "  r.reservation_datetime\n",
      "LIMIT 100\n",
      "USE INDEX (reservation_datetime);\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "SELECT /*+ USE_NL(r) USE_NL(f) */\n",
      "  r.reservation_id,\n",
      "  r.customer_id,\n",
      "  r.flight_id,\n",
      "  r.reservation_datetime,\n",
      "  r.status,\n",
      "  f.origin,\n",
      "  f.destination,\n",
      "  f.departure_datetime,\n",
      "  f.arrival_datetime,\n",
      "  f.carrier,\n",
      "  f.price\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.reservations AS r\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.flights AS f\n",
      "ON\n",
      "  r.flight_id = f.flight_id\n",
      "WHERE\n",
      "  r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\n",
      "ORDER BY\n",
      "  r.reservation_datetime\n",
      "LIMIT 100;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 332 ms, sys: 65 ms, total: 397 ms\n",
      "Wall time: 45.5 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SELECT /*+ USE_NL(r) USE_NL(f) */\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM\\n  arun-genai-bb.flight_reservations.reservations AS r\\nJOIN\\n  arun-genai-bb.flight_reservations.flights AS f\\nON\\n  r.flight_id = f.flight_id\\nWHERE\\n  r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\\nORDER BY\\n  r.reservation_datetime\\nLIMIT 100;</td>\n",
       "      <td>reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0</td>\n",
       "      <td>7.883168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM arun-genai-bb.flight_reservations.reservations r\\nJOIN arun-genai-bb.flight_reservations.flights f\\nON CAST(r.flight_id AS STRING) = CAST(f.flight_id AS STRING)\\nWHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'</td>\n",
       "      <td>reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0</td>\n",
       "      <td>8.936372</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM\\n  arun-genai-bb.flight_reservations.reservations AS r\\nJOIN\\n  arun-genai-bb.flight_reservations.flights AS f\\nON\\n  r.flight_id = f.flight_id\\nWHERE\\n  r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\\nORDER BY\\n  r.reservation_datetime\\nLIMIT 100;</td>\n",
       "      <td>reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0</td>\n",
       "      <td>11.076487</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM\\n  arun-genai-bb.flight_reservations.reservations AS r\\nJOIN\\n  arun-genai-bb.flight_reservations.flights AS f\\nON\\n  r.flight_id = f.flight_id\\nWHERE\\n  r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\\nORDER BY\\n  r.reservation_datetime;</td>\n",
       "      <td>reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0</td>\n",
       "      <td>12.221815</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     Query                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Result    Latency\n",
       "0  SELECT /*+ USE_NL(r) USE_NL(f) */\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM\\n  arun-genai-bb.flight_reservations.reservations AS r\\nJOIN\\n  arun-genai-bb.flight_reservations.flights AS f\\nON\\n  r.flight_id = f.flight_id\\nWHERE\\n  r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\\nORDER BY\\n  r.reservation_datetime\\nLIMIT 100;     reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0   7.883168\n",
       "1                                                               SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM arun-genai-bb.flight_reservations.reservations r\\nJOIN arun-genai-bb.flight_reservations.flights f\\nON CAST(r.flight_id AS STRING) = CAST(f.flight_id AS STRING)\\nWHERE r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'     reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0   8.936372\n",
       "2                             SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM\\n  arun-genai-bb.flight_reservations.reservations AS r\\nJOIN\\n  arun-genai-bb.flight_reservations.flights AS f\\nON\\n  r.flight_id = f.flight_id\\nWHERE\\n  r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\\nORDER BY\\n  r.reservation_datetime\\nLIMIT 100;     reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0  11.076487\n",
       "3                                        SELECT\\n  r.reservation_id,\\n  r.customer_id,\\n  r.flight_id,\\n  r.reservation_datetime,\\n  r.status,\\n  f.origin,\\n  f.destination,\\n  f.departure_datetime,\\n  f.arrival_datetime,\\n  f.carrier,\\n  f.price\\nFROM\\n  arun-genai-bb.flight_reservations.reservations AS r\\nJOIN\\n  arun-genai-bb.flight_reservations.flights AS f\\nON\\n  r.flight_id = f.flight_id\\nWHERE\\n  r.reservation_datetime BETWEEN '2023-10-10 00:00:00' AND '2023-10-15 23:59:59'\\nORDER BY\\n  r.reservation_datetime;     reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0  12.221815"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>reservation_id</th>\n",
       "      <th>customer_id</th>\n",
       "      <th>flight_id</th>\n",
       "      <th>reservation_datetime</th>\n",
       "      <th>status</th>\n",
       "      <th>origin</th>\n",
       "      <th>destination</th>\n",
       "      <th>departure_datetime</th>\n",
       "      <th>arrival_datetime</th>\n",
       "      <th>carrier</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>2023-10-10 10:00:00</td>\n",
       "      <td>Confirmed</td>\n",
       "      <td>SEA</td>\n",
       "      <td>JFK</td>\n",
       "      <td>2023-11-25 06:00:00</td>\n",
       "      <td>2023-11-25 14:30:00</td>\n",
       "      <td>United</td>\n",
       "      <td>550.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>2023-10-12 11:30:00</td>\n",
       "      <td>Confirmed</td>\n",
       "      <td>JFK</td>\n",
       "      <td>MIA</td>\n",
       "      <td>2023-11-27 20:00:00</td>\n",
       "      <td>2023-11-27 23:30:00</td>\n",
       "      <td>American</td>\n",
       "      <td>380.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>2023-10-15 13:20:00</td>\n",
       "      <td>Confirmed</td>\n",
       "      <td>MIA</td>\n",
       "      <td>JFK</td>\n",
       "      <td>2023-11-30 10:00:00</td>\n",
       "      <td>2023-11-30 13:30:00</td>\n",
       "      <td>American</td>\n",
       "      <td>380.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   reservation_id  customer_id  flight_id reservation_datetime     status origin destination  departure_datetime    arrival_datetime   carrier  price\n",
       "0               6            6          6  2023-10-10 10:00:00  Confirmed    SEA         JFK 2023-11-25 06:00:00 2023-11-25 14:30:00    United  550.0\n",
       "1               7            6          7  2023-10-12 11:30:00  Confirmed    JFK         MIA 2023-11-27 20:00:00 2023-11-27 23:30:00  American  380.0\n",
       "2               8            8          8  2023-10-15 13:20:00  Confirmed    MIA         JFK 2023-11-30 10:00:00 2023-11-30 13:30:00  American  380.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Scenario 2: Identify customers who made reservations in the past N days."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Identify all customers who have made flight reservations within the last 7 days.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "SELECT c.customer_id,\n",
      "       c.first_name,\n",
      "       c.last_name,\n",
      "       c.email\n",
      "FROM flight_reservations.customers c\n",
      "JOIN flight_reservations.reservations r\n",
      "ON c.customer_id = r.customer_id\n",
      "WHERE r.reservation_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME();\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "SELECT c.customer_id,\n",
      "       c.first_name,\n",
      "       c.last_name,\n",
      "       c.email,\n",
      "       t.transaction_datetime AS reservation_datetime\n",
      "FROM flight_reservations.transactions t\n",
      "JOIN flight_reservations.reservations r\n",
      "ON t.reservation_id = r.reservation_id\n",
      "JOIN flight_reservations.customers c\n",
      "ON r.customer_id = c.customer_id\n",
      "WHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME();\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "SELECT c.customer_id,\n",
      "       c.first_name,\n",
      "       c.last_name,\n",
      "       c.email,\n",
      "       t.transaction_datetime AS reservation_datetime\n",
      "FROM flight_reservations.transactions t\n",
      "JOIN flight_reservations.reservations r ON (t.reservation_id = r.reservation_id)\n",
      "JOIN flight_reservations.customers c ON (r.customer_id = c.customer_id)\n",
      "WHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME()\n",
      "ORDER BY t.transaction_datetime DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "SELECT c.customer_id,\n",
      "       c.first_name,\n",
      "       c.last_name,\n",
      "       c.email,\n",
      "       t.transaction_datetime AS reservation_datetime\n",
      "FROM flight_reservations.transactions t\n",
      "JOIN flight_reservations.reservations r ON (t.reservation_id = r.reservation_id)\n",
      "JOIN flight_reservations.customers c ON (r.customer_id = c.customer_id)\n",
      "WHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME()\n",
      "ORDER BY t.transaction_datetime DESC\n",
      "LIMIT 10;\n",
      "\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "SELECT c.customer_id,\n",
      "       c.first_name,\n",
      "       c.last_name,\n",
      "       c.email,\n",
      "       t.transaction_datetime AS reservation_datetime\n",
      "FROM flight_reservations.transactions t\n",
      "JOIN flight_reservations.reservations r ON (t.reservation_id = r.reservation_id)\n",
      "JOIN flight_reservations.customers c ON (r.customer_id = c.customer_id)\n",
      "WHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME()\n",
      "ORDER BY t.transaction_datetime DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 305 ms, sys: 64.9 ms, total: 370 ms\n",
      "Wall time: 1min 2s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SELECT c.customer_id,\\n       c.first_name,\\n       c.last_name,\\n       c.email\\nFROM flight_reservations.customers c\\nJOIN flight_reservations.reservations r\\nON c.customer_id = r.customer_id\\nWHERE r.reservation_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME();</td>\n",
       "      <td>customer_id first_name    last_name               email\n",
       "0           11        Ian  Somerhalder   ian.s@example.com\n",
       "1           13       Kate      Winslet  kate.w@example.com\n",
       "2           13       Kate      Winslet  kate.w@example.com</td>\n",
       "      <td>6.866242</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SELECT c.customer_id,\\n       c.first_name,\\n       c.last_name,\\n       c.email,\\n       t.transaction_datetime AS reservation_datetime\\nFROM flight_reservations.transactions t\\nJOIN flight_reservations.reservations r\\nON t.reservation_id = r.reservation_id\\nJOIN flight_reservations.customers c\\nON r.customer_id = c.customer_id\\nWHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME();</td>\n",
       "      <td>customer_id first_name    last_name               email reservation_datetime\n",
       "0           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00\n",
       "1           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00</td>\n",
       "      <td>7.724321</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SELECT c.customer_id,\\n       c.first_name,\\n       c.last_name,\\n       c.email,\\n       t.transaction_datetime AS reservation_datetime\\nFROM flight_reservations.transactions t\\nJOIN flight_reservations.reservations r ON (t.reservation_id = r.reservation_id)\\nJOIN flight_reservations.customers c ON (r.customer_id = c.customer_id)\\nWHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME()\\nORDER BY t.transaction_datetime DESC\\nLIMIT 10;</td>\n",
       "      <td>customer_id first_name    last_name               email reservation_datetime\n",
       "0           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "1           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00</td>\n",
       "      <td>13.303155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>SELECT c.customer_id,\\n       c.first_name,\\n       c.last_name,\\n       c.email,\\n       t.transaction_datetime AS reservation_datetime\\nFROM flight_reservations.transactions t\\nJOIN flight_reservations.reservations r ON (t.reservation_id = r.reservation_id)\\nJOIN flight_reservations.customers c ON (r.customer_id = c.customer_id)\\nWHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME()\\nORDER BY t.transaction_datetime DESC\\nLIMIT 10;\\n</td>\n",
       "      <td>customer_id first_name    last_name               email reservation_datetime\n",
       "0           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "1           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00</td>\n",
       "      <td>16.455699</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>SELECT c.customer_id,\\n       c.first_name,\\n       c.last_name,\\n       c.email,\\n       t.transaction_datetime AS reservation_datetime\\nFROM flight_reservations.transactions t\\nJOIN flight_reservations.reservations r ON (t.reservation_id = r.reservation_id)\\nJOIN flight_reservations.customers c ON (r.customer_id = c.customer_id)\\nWHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME()\\nORDER BY t.transaction_datetime DESC\\nLIMIT 10;</td>\n",
       "      <td>customer_id first_name    last_name               email reservation_datetime\n",
       "0           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "1           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00</td>\n",
       "      <td>17.874523</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       Query                                                                                                                                                                                                                                           Result    Latency\n",
       "0                                                                                                                                                                                               SELECT c.customer_id,\\n       c.first_name,\\n       c.last_name,\\n       c.email\\nFROM flight_reservations.customers c\\nJOIN flight_reservations.reservations r\\nON c.customer_id = r.customer_id\\nWHERE r.reservation_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME();         customer_id first_name    last_name               email\n",
       "0           11        Ian  Somerhalder   ian.s@example.com\n",
       "1           13       Kate      Winslet  kate.w@example.com\n",
       "2           13       Kate      Winslet  kate.w@example.com   6.866242\n",
       "1                                                      SELECT c.customer_id,\\n       c.first_name,\\n       c.last_name,\\n       c.email,\\n       t.transaction_datetime AS reservation_datetime\\nFROM flight_reservations.transactions t\\nJOIN flight_reservations.reservations r\\nON t.reservation_id = r.reservation_id\\nJOIN flight_reservations.customers c\\nON r.customer_id = c.customer_id\\nWHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME();     customer_id first_name    last_name               email reservation_datetime\n",
       "0           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00\n",
       "1           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00   7.724321\n",
       "2    SELECT c.customer_id,\\n       c.first_name,\\n       c.last_name,\\n       c.email,\\n       t.transaction_datetime AS reservation_datetime\\nFROM flight_reservations.transactions t\\nJOIN flight_reservations.reservations r ON (t.reservation_id = r.reservation_id)\\nJOIN flight_reservations.customers c ON (r.customer_id = c.customer_id)\\nWHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME()\\nORDER BY t.transaction_datetime DESC\\nLIMIT 10;     customer_id first_name    last_name               email reservation_datetime\n",
       "0           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "1           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00  13.303155\n",
       "3  SELECT c.customer_id,\\n       c.first_name,\\n       c.last_name,\\n       c.email,\\n       t.transaction_datetime AS reservation_datetime\\nFROM flight_reservations.transactions t\\nJOIN flight_reservations.reservations r ON (t.reservation_id = r.reservation_id)\\nJOIN flight_reservations.customers c ON (r.customer_id = c.customer_id)\\nWHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME()\\nORDER BY t.transaction_datetime DESC\\nLIMIT 10;\\n     customer_id first_name    last_name               email reservation_datetime\n",
       "0           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "1           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00  16.455699\n",
       "4    SELECT c.customer_id,\\n       c.first_name,\\n       c.last_name,\\n       c.email,\\n       t.transaction_datetime AS reservation_datetime\\nFROM flight_reservations.transactions t\\nJOIN flight_reservations.reservations r ON (t.reservation_id = r.reservation_id)\\nJOIN flight_reservations.customers c ON (r.customer_id = c.customer_id)\\nWHERE t.transaction_datetime BETWEEN DATE_SUB(CURRENT_DATETIME(), INTERVAL 7 DAY) AND CURRENT_DATETIME()\\nORDER BY t.transaction_datetime DESC\\nLIMIT 10;     customer_id first_name    last_name               email reservation_datetime\n",
       "0           13       Kate      Winslet  kate.w@example.com  2023-11-02 08:21:00\n",
       "1           11        Ian  Somerhalder   ian.s@example.com  2023-10-28 17:11:00  17.874523"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "      <th>email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>11</td>\n",
       "      <td>Ian</td>\n",
       "      <td>Somerhalder</td>\n",
       "      <td>ian.s@example.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>13</td>\n",
       "      <td>Kate</td>\n",
       "      <td>Winslet</td>\n",
       "      <td>kate.w@example.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>13</td>\n",
       "      <td>Kate</td>\n",
       "      <td>Winslet</td>\n",
       "      <td>kate.w@example.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id first_name    last_name               email\n",
       "0           11        Ian  Somerhalder   ian.s@example.com\n",
       "1           13       Kate      Winslet  kate.w@example.com\n",
       "2           13       Kate      Winslet  kate.w@example.com"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Scenario 3: Calculate Monthly Revenue\n",
    "Calculate the total revenue generated from transactions for a given month and year."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Calculate the total revenue generated from transactions in October 2023, specifically from all reservations with a Confirmed status.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  SUM(transactions.amount) AS total_revenue_usd\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.transactions AS transactions\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.reservations AS reservations\n",
      "ON\n",
      "  transactions.reservation_id = reservations.reservation_id\n",
      "WHERE\n",
      "  reservations.status = 'Confirmed'\n",
      "  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  SUM(transactions.amount) AS total_revenue_usd\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.transactions AS transactions\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.reservations AS reservations\n",
      "ON\n",
      "  transactions.reservation_id = reservations.reservation_id\n",
      "WHERE\n",
      "  reservations.status = 'Confirmed'\n",
      "  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  SUM(transactions.amount) AS total_revenue_usd\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.transactions AS transactions\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.reservations AS reservations\n",
      "ON\n",
      "  transactions.reservation_id = reservations.reservation_id\n",
      "WHERE\n",
      "  reservations.status = 'Confirmed'\n",
      "  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59'\n",
      "GROUP BY\n",
      "  reservations.reservation_id;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  SUM(transactions.amount) AS total_revenue_usd\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.transactions AS transactions\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.reservations AS reservations\n",
      "ON\n",
      "  transactions.reservation_id = reservations.reservation_id\n",
      "WHERE\n",
      "  reservations.status = 'Confirmed'\n",
      "  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59'\n",
      "GROUP BY\n",
      "  reservations.reservation_id;\n",
      "\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  SUM(transactions.amount) AS total_revenue_usd\n",
      "FROM\n",
      "  arun-genai-bb.flight_reservations.transactions AS transactions\n",
      "JOIN\n",
      "  arun-genai-bb.flight_reservations.reservations AS reservations\n",
      "ON\n",
      "  transactions.reservation_id = reservations.reservation_id\n",
      "WHERE\n",
      "  reservations.status = 'Confirmed'\n",
      "  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59'\n",
      "GROUP BY\n",
      "  reservations.reservation_id\n",
      "HAVING\n",
      "  SUM(transactions.amount) > 0\n",
      "ORDER BY\n",
      "  total_revenue_usd DESC\n",
      "LIMIT\n",
      "  10;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 303 ms, sys: 74 ms, total: 377 ms\n",
      "Wall time: 1min 17s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';</td>\n",
       "      <td>total_revenue_usd\n",
       "0             3860.0</td>\n",
       "      <td>10.283573</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59'\\nGROUP BY\\n  reservations.reservation_id;\\n</td>\n",
       "      <td>total_revenue_usd\n",
       "0              450.0\n",
       "1              450.0\n",
       "2              300.0\n",
       "3              350.0\n",
       "4              550.0\n",
       "5              380.0\n",
       "6              380.0\n",
       "7              200.0\n",
       "8              400.0\n",
       "9              400.0</td>\n",
       "      <td>13.738721</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59'\\nGROUP BY\\n  reservations.reservation_id\\nHAVING\\n  SUM(transactions.amount) &gt; 0\\nORDER BY\\n  total_revenue_usd DESC\\nLIMIT\\n  10;</td>\n",
       "      <td>total_revenue_usd\n",
       "0              550.0\n",
       "1              450.0\n",
       "2              450.0\n",
       "3              400.0\n",
       "4              400.0\n",
       "5              380.0\n",
       "6              380.0\n",
       "7              350.0\n",
       "8              300.0\n",
       "9              200.0</td>\n",
       "      <td>14.285593</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';</td>\n",
       "      <td>total_revenue_usd\n",
       "0             3860.0</td>\n",
       "      <td>17.388042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59'\\nGROUP BY\\n  reservations.reservation_id;</td>\n",
       "      <td>total_revenue_usd\n",
       "0              450.0\n",
       "1              450.0\n",
       "2              300.0\n",
       "3              350.0\n",
       "4              550.0\n",
       "5              380.0\n",
       "6              380.0\n",
       "7              200.0\n",
       "8              400.0\n",
       "9              400.0</td>\n",
       "      <td>21.345734</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      Query                                                                                                                                                                                                                                  Result    Latency\n",
       "0                                                                                                                                    SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';                                                                                                                                                                                                  total_revenue_usd\n",
       "0             3860.0  10.283573\n",
       "1                                                                                         SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59'\\nGROUP BY\\n  reservations.reservation_id;\\n     total_revenue_usd\n",
       "0              450.0\n",
       "1              450.0\n",
       "2              300.0\n",
       "3              350.0\n",
       "4              550.0\n",
       "5              380.0\n",
       "6              380.0\n",
       "7              200.0\n",
       "8              400.0\n",
       "9              400.0  13.738721\n",
       "2  SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59'\\nGROUP BY\\n  reservations.reservation_id\\nHAVING\\n  SUM(transactions.amount) > 0\\nORDER BY\\n  total_revenue_usd DESC\\nLIMIT\\n  10;     total_revenue_usd\n",
       "0              550.0\n",
       "1              450.0\n",
       "2              450.0\n",
       "3              400.0\n",
       "4              400.0\n",
       "5              380.0\n",
       "6              380.0\n",
       "7              350.0\n",
       "8              300.0\n",
       "9              200.0  14.285593\n",
       "3                                                                                                                                    SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59';                                                                                                                                                                                                  total_revenue_usd\n",
       "0             3860.0  17.388042\n",
       "4                                                                                           SELECT\\n  SUM(transactions.amount) AS total_revenue_usd\\nFROM\\n  arun-genai-bb.flight_reservations.transactions AS transactions\\nJOIN\\n  arun-genai-bb.flight_reservations.reservations AS reservations\\nON\\n  transactions.reservation_id = reservations.reservation_id\\nWHERE\\n  reservations.status = 'Confirmed'\\n  AND transactions.transaction_datetime BETWEEN '2023-10-01 00:00:00' AND '2023-10-31 23:59:59'\\nGROUP BY\\n  reservations.reservation_id;     total_revenue_usd\n",
       "0              450.0\n",
       "1              450.0\n",
       "2              300.0\n",
       "3              350.0\n",
       "4              550.0\n",
       "5              380.0\n",
       "6              380.0\n",
       "7              200.0\n",
       "8              400.0\n",
       "9              400.0  21.345734"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_revenue_usd</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3860.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_revenue_usd\n",
       "0             3860.0"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Scenario 4: Popular Flight Times\n",
    "Identify the most popular departure hours or days for a given day or month or year."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Determine the departure months with the highest frequency for the year 2023.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\n",
      "       COUNT(*) AS flight_count\n",
      "FROM flight_reservations.flights\n",
      "WHERE DATE_TRUNC(departure_datetime, YEAR) = '2023'\n",
      "GROUP BY departure_month\n",
      "ORDER BY flight_count DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\n",
      "       COUNT(*) AS flight_count\n",
      "FROM flight_reservations.flights\n",
      "WHERE YEAR(departure_datetime) = 2023\n",
      "GROUP BY departure_month\n",
      "ORDER BY flight_count DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\n",
      "       COUNT(*) AS flight_count\n",
      "FROM flight_reservations.flights\n",
      "WHERE departure_datetime >= '2023-01-01'\n",
      "AND departure_datetime < '2024-01-01'\n",
      "GROUP BY departure_month\n",
      "ORDER BY flight_count DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\n",
      "       COUNT(*) AS flight_count\n",
      "FROM flight_reservations.flights\n",
      "WHERE departure_datetime BETWEEN '2023-01-01' AND '2023-12-31'\n",
      "GROUP BY departure_month\n",
      "ORDER BY flight_count DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\n",
      "       COUNT(*) AS flight_count\n",
      "FROM flight_reservations.flights\n",
      "WHERE departure_datetime BETWEEN '2023-01-01' AND '2023-12-31'\n",
      "GROUP BY departure_month\n",
      "ORDER BY flight_count DESC\n",
      "LIMIT 10;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 212 ms, sys: 58.1 ms, total: 270 ms\n",
      "Wall time: 56.7 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\\n       COUNT(*) AS flight_count\\nFROM flight_reservations.flights\\nWHERE departure_datetime &gt;= '2023-01-01'\\nAND departure_datetime &lt; '2024-01-01'\\nGROUP BY departure_month\\nORDER BY flight_count DESC\\nLIMIT 10;</td>\n",
       "      <td>departure_month  flight_count\n",
       "0      2023-11-01             8\n",
       "1      2023-12-01             8</td>\n",
       "      <td>10.785775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\\n       COUNT(*) AS flight_count\\nFROM flight_reservations.flights\\nWHERE departure_datetime BETWEEN '2023-01-01' AND '2023-12-31'\\nGROUP BY departure_month\\nORDER BY flight_count DESC\\nLIMIT 10;</td>\n",
       "      <td>departure_month  flight_count\n",
       "0      2023-11-01             8\n",
       "1      2023-12-01             7</td>\n",
       "      <td>12.763651</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\\n       COUNT(*) AS flight_count\\nFROM flight_reservations.flights\\nWHERE departure_datetime BETWEEN '2023-01-01' AND '2023-12-31'\\nGROUP BY departure_month\\nORDER BY flight_count DESC\\nLIMIT 10;</td>\n",
       "      <td>departure_month  flight_count\n",
       "0      2023-11-01             8\n",
       "1      2023-12-01             7</td>\n",
       "      <td>15.225393</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                   Query                                                                                           Result    Latency\n",
       "0  SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\\n       COUNT(*) AS flight_count\\nFROM flight_reservations.flights\\nWHERE departure_datetime >= '2023-01-01'\\nAND departure_datetime < '2024-01-01'\\nGROUP BY departure_month\\nORDER BY flight_count DESC\\nLIMIT 10;    departure_month  flight_count\n",
       "0      2023-11-01             8\n",
       "1      2023-12-01             8  10.785775\n",
       "1                   SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\\n       COUNT(*) AS flight_count\\nFROM flight_reservations.flights\\nWHERE departure_datetime BETWEEN '2023-01-01' AND '2023-12-31'\\nGROUP BY departure_month\\nORDER BY flight_count DESC\\nLIMIT 10;    departure_month  flight_count\n",
       "0      2023-11-01             8\n",
       "1      2023-12-01             7  12.763651\n",
       "2                   SELECT DATE_TRUNC(departure_datetime, MONTH) AS departure_month,\\n       COUNT(*) AS flight_count\\nFROM flight_reservations.flights\\nWHERE departure_datetime BETWEEN '2023-01-01' AND '2023-12-31'\\nGROUP BY departure_month\\nORDER BY flight_count DESC\\nLIMIT 10;    departure_month  flight_count\n",
       "0      2023-11-01             8\n",
       "1      2023-12-01             7  15.225393"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>departure_month</th>\n",
       "      <th>flight_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2023-11-01</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2023-12-01</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  departure_month  flight_count\n",
       "0      2023-11-01             8\n",
       "1      2023-12-01             8"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Scenario 5: Customer Age Group\n",
    "Group customers by age brackets and count the number in each bracket."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Group customers into five distinct age brackets and count the number of customers in each bracket.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "SELECT \n",
      "  CASE \n",
      "    WHEN c.date_of_birth IS NULL THEN 'Unknown'\n",
      "    WHEN c.date_of_birth >= DATE_SUB(CURRENT_DATE(), INTERVAL 80 YEAR) THEN '80+'\n",
      "    WHEN c.date_of_birth >= DATE_SUB(CURRENT_DATE(), INTERVAL 65 YEAR) THEN '65-79'\n",
      "    WHEN c.date_of_birth >= DATE_SUB(CURRENT_DATE(), INTERVAL 50 YEAR) THEN '50-64'\n",
      "    WHEN c.date_of_birth >= DATE_SUB(CURRENT_DATE(), INTERVAL 35 YEAR) THEN '35-49'\n",
      "    ELSE '0-34'\n",
      "  END AS age_bracket,\n",
      "  COUNT(c.customer_id) AS num_customers\n",
      "FROM flight_reservations.customers c\n",
      "GROUP BY age_bracket;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  FLOOR(DATEDIFF(CURRENT_DATE(), date_of_birth) / 365) AS age_bracket,\n",
      "  COUNT(*) AS num_customers\n",
      "FROM flight_reservations.customers\n",
      "GROUP BY age_bracket\n",
      "ORDER BY age_bracket;\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  CASE\n",
      "    WHEN DATEDIFF(CURRENT_DATE(), date_of_birth) < 365 THEN '0-1'\n",
      "    WHEN DATEDIFF(CURRENT_DATE(), date_of_birth) < 730 THEN '1-2'\n",
      "    WHEN DATEDIFF(CURRENT_DATE(), date_of_birth) < 1095 THEN '2-3'\n",
      "    WHEN DATEDIFF(CURRENT_DATE(), date_of_birth) < 1460 THEN '3-4'\n",
      "    ELSE '4+'\n",
      "  END AS age_bracket,\n",
      "  COUNT(*) AS num_customers\n",
      "FROM flight_reservations.customers\n",
      "GROUP BY age_bracket\n",
      "ORDER BY age_bracket;\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  CASE\n",
      "    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 365 THEN '0-1'\n",
      "    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 730 THEN '1-2'\n",
      "    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 1095 THEN '2-3'\n",
      "    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 1460 THEN '3-4'\n",
      "    ELSE '4+'\n",
      "  END AS age_bracket,\n",
      "  COUNT(*) AS num_customers\n",
      "FROM flight_reservations.customers\n",
      "GROUP BY age_bracket\n",
      "ORDER BY age_bracket;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "SELECT\n",
      "  CASE\n",
      "    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 365 THEN '0-1'\n",
      "    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 730 THEN '1-2'\n",
      "    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 1095 THEN '2-3'\n",
      "    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 1460 THEN '3-4'\n",
      "    ELSE '4+'\n",
      "  END AS age_bracket,\n",
      "  COUNT(*) AS num_customers\n",
      "FROM flight_reservations.customers\n",
      "WHERE date_of_birth IS NOT NULL\n",
      "GROUP BY age_bracket\n",
      "ORDER BY age_bracket;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 237 ms, sys: 68.3 ms, total: 306 ms\n",
      "Wall time: 1min 11s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SELECT \\n  CASE \\n    WHEN c.date_of_birth IS NULL THEN 'Unknown'\\n    WHEN c.date_of_birth &gt;= DATE_SUB(CURRENT_DATE(), INTERVAL 80 YEAR) THEN '80+'\\n    WHEN c.date_of_birth &gt;= DATE_SUB(CURRENT_DATE(), INTERVAL 65 YEAR) THEN '65-79'\\n    WHEN c.date_of_birth &gt;= DATE_SUB(CURRENT_DATE(), INTERVAL 50 YEAR) THEN '50-64'\\n    WHEN c.date_of_birth &gt;= DATE_SUB(CURRENT_DATE(), INTERVAL 35 YEAR) THEN '35-49'\\n    ELSE '0-34'\\n  END AS age_bracket,\\n  COUNT(c.customer_id) AS num_customers\\nFROM flight_reservations.customers c\\nGROUP BY age_bracket;</td>\n",
       "      <td>age_bracket  num_customers\n",
       "0         80+             19\n",
       "1        0-34              1</td>\n",
       "      <td>15.148550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SELECT\\n  CASE\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) &lt; 365 THEN '0-1'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) &lt; 730 THEN '1-2'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) &lt; 1095 THEN '2-3'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) &lt; 1460 THEN '3-4'\\n    ELSE '4+'\\n  END AS age_bracket,\\n  COUNT(*) AS num_customers\\nFROM flight_reservations.customers\\nGROUP BY age_bracket\\nORDER BY age_bracket;</td>\n",
       "      <td>age_bracket  num_customers\n",
       "0          4+             20</td>\n",
       "      <td>15.232111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SELECT\\n  CASE\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) &lt; 365 THEN '0-1'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) &lt; 730 THEN '1-2'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) &lt; 1095 THEN '2-3'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) &lt; 1460 THEN '3-4'\\n    ELSE '4+'\\n  END AS age_bracket,\\n  COUNT(*) AS num_customers\\nFROM flight_reservations.customers\\nWHERE date_of_birth IS NOT NULL\\nGROUP BY age_bracket\\nORDER BY age_bracket;</td>\n",
       "      <td>age_bracket  num_customers\n",
       "0          4+             20</td>\n",
       "      <td>15.344399</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               Query                                                                                  Result    Latency\n",
       "0  SELECT \\n  CASE \\n    WHEN c.date_of_birth IS NULL THEN 'Unknown'\\n    WHEN c.date_of_birth >= DATE_SUB(CURRENT_DATE(), INTERVAL 80 YEAR) THEN '80+'\\n    WHEN c.date_of_birth >= DATE_SUB(CURRENT_DATE(), INTERVAL 65 YEAR) THEN '65-79'\\n    WHEN c.date_of_birth >= DATE_SUB(CURRENT_DATE(), INTERVAL 50 YEAR) THEN '50-64'\\n    WHEN c.date_of_birth >= DATE_SUB(CURRENT_DATE(), INTERVAL 35 YEAR) THEN '35-49'\\n    ELSE '0-34'\\n  END AS age_bracket,\\n  COUNT(c.customer_id) AS num_customers\\nFROM flight_reservations.customers c\\nGROUP BY age_bracket;    age_bracket  num_customers\n",
       "0         80+             19\n",
       "1        0-34              1  15.148550\n",
       "1                                                                                           SELECT\\n  CASE\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 365 THEN '0-1'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 730 THEN '1-2'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 1095 THEN '2-3'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 1460 THEN '3-4'\\n    ELSE '4+'\\n  END AS age_bracket,\\n  COUNT(*) AS num_customers\\nFROM flight_reservations.customers\\nGROUP BY age_bracket\\nORDER BY age_bracket;                                 age_bracket  num_customers\n",
       "0          4+             20  15.232111\n",
       "2                                                          SELECT\\n  CASE\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 365 THEN '0-1'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 730 THEN '1-2'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 1095 THEN '2-3'\\n    WHEN date_diff(CURRENT_DATE(), date_of_birth, DAY) < 1460 THEN '3-4'\\n    ELSE '4+'\\n  END AS age_bracket,\\n  COUNT(*) AS num_customers\\nFROM flight_reservations.customers\\nWHERE date_of_birth IS NOT NULL\\nGROUP BY age_bracket\\nORDER BY age_bracket;                                 age_bracket  num_customers\n",
       "0          4+             20  15.344399"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age_bracket</th>\n",
       "      <th>num_customers</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>80+</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0-34</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  age_bracket  num_customers\n",
       "0         80+             19\n",
       "1        0-34              1"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Scenario 6: Age Calculation\n",
    "Calculate the age of customers based on their date of birth and filter those who are above X years old."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Identify and rank all customers aged 18+ who have `Confirmed` reservations for the current month, ordered by their age. Make sure to display their ages in the result.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = seed_prompt.format(question, schema_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRIAL: 1\n",
      "--------------------------------------------------\n",
      "WITH current_month AS (\n",
      "  SELECT DATE_TRUNC(CURRENT_DATE(), MONTH) AS start_date,\n",
      "         DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH) AS end_date\n",
      ")\n",
      "\n",
      "SELECT \n",
      "  c.customer_id, \n",
      "  c.first_name, \n",
      "  c.last_name,\n",
      "  DATEDIFF(CURRENT_DATE(), c.date_of_birth) AS age, \n",
      "  r.reservation_id,\n",
      "  r.status\n",
      "FROM current_month AS cm\n",
      "JOIN flight_reservations.reservations AS r\n",
      "ON r.reservation_datetime BETWEEN cm.start_date AND cm.end_date\n",
      "AND r.status = 'Confirmed'\n",
      "JOIN flight_reservations.customers AS c\n",
      "ON c.customer_id = r.customer_id\n",
      "WHERE DATEDIFF(CURRENT_DATE(), c.date_of_birth) >= 18\n",
      "ORDER BY age DESC;\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 2\n",
      "--------------------------------------------------\n",
      "  SELECT DATE_TRUNC(CURRENT_DATE(), MONTH) AS start_date,\n",
      "         DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH) AS end_date\n",
      ")\n",
      "\n",
      "SELECT \n",
      "  c.customer_id, \n",
      "  c.first_name, \n",
      "  c.last_name,\n",
      "  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age, \n",
      "  r.reservation_id,\n",
      "  r.status\n",
      "FROM current_month AS cm\n",
      "JOIN flight_reservations.reservations AS r\n",
      "ON r.reservation_datetime BETWEEN cm.start_date AND cm.end_date\n",
      "AND r.status = 'Confirmed'\n",
      "JOIN flight_reservations.customers AS c\n",
      "ON c.customer_id = r.customer_id\n",
      "WHERE DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) >= 18\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 3\n",
      "--------------------------------------------------\n",
      "WITH current_month AS (\n",
      "  SELECT DATE_TRUNC(CURRENT_DATE(), MONTH) AS start_date,\n",
      "         DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH) AS end_date\n",
      ")\n",
      "\n",
      "SELECT \n",
      "  c.customer_id, \n",
      "  c.first_name, \n",
      "  c.last_name,\n",
      "  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age, \n",
      "  r.reservation_id,\n",
      "  r.status\n",
      "FROM current_month AS cm\n",
      "JOIN flight_reservations.reservations AS r\n",
      "ON r.reservation_datetime BETWEEN cm.start_date AND cm.end_date\n",
      "AND r.status = 'Confirmed'\n",
      "JOIN flight_reservations.customers AS c\n",
      "ON c.customer_id = r.customer_id\n",
      "WHERE DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) >= 18\n",
      "ORDER BY age DESC;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n",
      "TRIAL: 4\n",
      "--------------------------------------------------\n",
      "  SELECT DATE_TRUNC(CURRENT_DATE(), MONTH) AS start_date,\n",
      "         DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH) AS end_date\n",
      ")\n",
      "\n",
      "SELECT \n",
      "  c.customer_id, \n",
      "  c.first_name, \n",
      "  c.last_name,\n",
      "  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age, \n",
      "  r.reservation_id,\n",
      "  r.status\n",
      "FROM current_month AS cm\n",
      "JOIN flight_reservations.reservations AS r\n",
      "ON r.reservation_datetime BETWEEN cm.start_date AND cm.end_date\n",
      "AND r.status = 'Confirmed'\n",
      "JOIN flight_reservations.customers AS c\n",
      "ON c.customer_id = r.customer_id\n",
      "WHERE DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) >= 18\n",
      "--------------------------------------------------\n",
      "FAILED\n",
      "====================================================================================================\n",
      "TRIAL: 5\n",
      "--------------------------------------------------\n",
      "WITH current_month AS (\n",
      "  SELECT DATE_TRUNC(CURRENT_DATE(), MONTH) AS start_date,\n",
      "         DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH) AS end_date\n",
      ")\n",
      "\n",
      "SELECT \n",
      "  c.customer_id, \n",
      "  c.first_name, \n",
      "  c.last_name,\n",
      "  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age, \n",
      "  r.reservation_id,\n",
      "  r.status\n",
      "FROM current_month AS cm\n",
      "JOIN flight_reservations.reservations AS r\n",
      "ON r.reservation_datetime BETWEEN cm.start_date AND cm.end_date\n",
      "AND r.status = 'Confirmed'\n",
      "JOIN flight_reservations.customers AS c\n",
      "ON c.customer_id = r.customer_id\n",
      "WHERE DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) >= 18\n",
      "ORDER BY age DESC;\n",
      "--------------------------------------------------\n",
      "SUCCEEDED\n",
      "====================================================================================================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 182 ms, sys: 60.7 ms, total: 243 ms\n",
      "Wall time: 53.1 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Result</th>\n",
       "      <th>Latency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>WITH current_month AS (\\n  SELECT DATE_TRUNC(CURRENT_DATE(), MONTH) AS start_date,\\n         DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH) AS end_date\\n)\\n\\nSELECT \\n  c.customer_id, \\n  c.first_name, \\n  c.last_name,\\n  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age, \\n  r.reservation_id,\\n  r.status\\nFROM current_month AS cm\\nJOIN flight_reservations.reservations AS r\\nON r.reservation_datetime BETWEEN cm.start_date AND cm.end_date\\nAND r.status = 'Confirmed'\\nJOIN flight_reservations.customers AS c\\nON c.customer_id = r.customer_id\\nWHERE DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) &gt;= 18\\nORDER BY age DESC;</td>\n",
       "      <td>customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed</td>\n",
       "      <td>12.984444</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>WITH current_month AS (\\n  SELECT DATE_TRUNC(CURRENT_DATE(), MONTH) AS start_date,\\n         DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH) AS end_date\\n)\\n\\nSELECT \\n  c.customer_id, \\n  c.first_name, \\n  c.last_name,\\n  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age, \\n  r.reservation_id,\\n  r.status\\nFROM current_month AS cm\\nJOIN flight_reservations.reservations AS r\\nON r.reservation_datetime BETWEEN cm.start_date AND cm.end_date\\nAND r.status = 'Confirmed'\\nJOIN flight_reservations.customers AS c\\nON c.customer_id = r.customer_id\\nWHERE DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) &gt;= 18\\nORDER BY age DESC;</td>\n",
       "      <td>customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed</td>\n",
       "      <td>15.977762</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     Query                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       Result    Latency\n",
       "0  WITH current_month AS (\\n  SELECT DATE_TRUNC(CURRENT_DATE(), MONTH) AS start_date,\\n         DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH) AS end_date\\n)\\n\\nSELECT \\n  c.customer_id, \\n  c.first_name, \\n  c.last_name,\\n  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age, \\n  r.reservation_id,\\n  r.status\\nFROM current_month AS cm\\nJOIN flight_reservations.reservations AS r\\nON r.reservation_datetime BETWEEN cm.start_date AND cm.end_date\\nAND r.status = 'Confirmed'\\nJOIN flight_reservations.customers AS c\\nON c.customer_id = r.customer_id\\nWHERE DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) >= 18\\nORDER BY age DESC;     customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed  12.984444\n",
       "1  WITH current_month AS (\\n  SELECT DATE_TRUNC(CURRENT_DATE(), MONTH) AS start_date,\\n         DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH) AS end_date\\n)\\n\\nSELECT \\n  c.customer_id, \\n  c.first_name, \\n  c.last_name,\\n  DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) AS age, \\n  r.reservation_id,\\n  r.status\\nFROM current_month AS cm\\nJOIN flight_reservations.reservations AS r\\nON r.reservation_datetime BETWEEN cm.start_date AND cm.end_date\\nAND r.status = 'Confirmed'\\nJOIN flight_reservations.customers AS c\\nON c.customer_id = r.customer_id\\nWHERE DATE_DIFF(CURRENT_DATE(), c.date_of_birth, YEAR) >= 18\\nORDER BY age DESC;     customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed  15.977762"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "response = generate_and_execute_sql(prompt=prompt, return_all=True)\n",
    "sql_output = response['dataframe']\n",
    "sql_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "      <th>age</th>\n",
       "      <th>reservation_id</th>\n",
       "      <th>status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>17</td>\n",
       "      <td>Olivia</td>\n",
       "      <td>Newton</td>\n",
       "      <td>75</td>\n",
       "      <td>17</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>16</td>\n",
       "      <td>Nick</td>\n",
       "      <td>Fury</td>\n",
       "      <td>58</td>\n",
       "      <td>16</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>13</td>\n",
       "      <td>Kate</td>\n",
       "      <td>Winslet</td>\n",
       "      <td>48</td>\n",
       "      <td>14</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20</td>\n",
       "      <td>Ryan</td>\n",
       "      <td>Reynolds</td>\n",
       "      <td>47</td>\n",
       "      <td>20</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15</td>\n",
       "      <td>Mary</td>\n",
       "      <td>Jane</td>\n",
       "      <td>30</td>\n",
       "      <td>15</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>18</td>\n",
       "      <td>Peter</td>\n",
       "      <td>Parker</td>\n",
       "      <td>22</td>\n",
       "      <td>18</td>\n",
       "      <td>Confirmed</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id first_name last_name  age  reservation_id     status\n",
       "0           17     Olivia    Newton   75              17  Confirmed\n",
       "1           16       Nick      Fury   58              16  Confirmed\n",
       "2           13       Kate   Winslet   48              14  Confirmed\n",
       "3           20       Ryan  Reynolds   47              20  Confirmed\n",
       "4           15       Mary      Jane   30              15  Confirmed\n",
       "5           18      Peter    Parker   22              18  Confirmed"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_df = sql_output.loc[0, 'Result']\n",
    "result_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".bq-sql-agent",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
